import re
import csv
import requests

head = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}


def getbookslist(url):
    html = requests.get(url, headers=head).text
    mylist = re.findall(r'<td valign="top">(.*?)</td>',html,re.S)
    for web in mylist:
        mydict = {
            "书名": re.findall(r'title="(.*?)"', web)[0],
            "简介": re.findall(r'<p class="pl">(.*?)</p>',web)[0],
            "评分": re.findall(r'<span class="rating_nums">(.*)</span>', web)[0],
            "评价人数": str(re.findall(r'<span class="pl">\(\n(.*?)人评价\n.*\)</span>', web)[0]).replace(" ", ""),
        }
        dictwriter.writerow(mydict)


mydictheader = ["书名", "简介", "评分", "评价人数"]
with open("D:/JetBrains Projects/Pycharm Projects/python-learn/爬虫作业/豆瓣图书/豆瓣图书.csv", "a", newline="", encoding="utf-8") as f:
    dictwriter = csv.DictWriter(f, mydictheader)
    dictwriter.writeheader()
    for i in range(10):
        url = f"https://book.douban.com/top250?start={25*i}"
        getbookslist(url)
